clear
set more off
cap log close
set logtype text

global fullcount_files /homes/data/cens1940.work/olivetti/msjohn/dtafiles/output
global myfiles /homes/data/cens1940.work/olivetti/lsalisbu


/* NOTE: This file must be run on the NBER server */


log using $myfiles/simulation_prelim.txt, replace
use $myfiles/ALL_1pct.dta, clear



egen count_hh = tag(hhid_numeric)
tab count_hh 

egen grp_hh = group(hhid_numeric stateicp)
egen count_grp_hh = tag(grp_hh)
tab count_grp_hh

destring age, gen(test) force
replace test = 0 if age!="" & test==.
drop age
rename test age

gen name = upper(namefrst)

gen temp = 1 if marst=="Married"
replace temp = 3 if marst=="Separated"
replace temp = 4 if marst=="Divorced"
replace temp = 5 if marst=="Widowed" | marst=="Widow"
replace temp = 6 if marst=="Single"

tab marst if temp==.

drop marst
rename temp marst

label define mlbl 1 "Married" 3 "Separated" 4 "Divorced" 5 "Widowed" 6 "Never Married"
label values marst mlbl

tab higrade

gen educ_attain=8 if higrade=="Elementary school, 8th grade"
replace educ_attain=12 if higrade=="High School, 4th year"
replace educ_attain=10 if higrade=="High School, 2nd year"
replace educ_attain=7 if higrade=="Elementary school, 7th grade"
replace educ_attain=9 if higrade=="High School, 1st year"
replace educ_attain=6 if higrade=="Elementary school, 6th grade"
replace educ_attain=11 if higrade=="High School, 3rd year"
replace educ_attain=5 if higrade=="Elementary school, 5th grade"
replace educ_attain=16 if higrade=="College, 4th year"
replace educ_attain=4 if higrade=="Elementary school, 4th grade"
replace educ_attain=14 if higrade=="College, 2nd year"
replace educ_attain=13 if higrade=="College, 1st year"
replace educ_attain=3 if higrade=="Elementary school, 3rd grade"
replace educ_attain=0 if higrade=="None"
replace educ_attain=15 if higrade=="College, 3rd year"
replace educ_attain=2 if higrade=="Elementary school, 2nd grade"
replace educ_attain=17 if higrade=="College, 5th or subsequent year"
replace educ_attain=1 if higrade=="Elementary school, 1st grade"
replace educ_attain=0 if higrade=="0"
replace educ_attain=12 if higrade=="High School"
replace educ_attain=16 if higrade=="College"

tab educ_attain
count if educ_attain==.

drop higrade

preserve

keep if samp1==1 & age>=30 & age<=45

keep sex name educ_attain 

save $myfiles/simulation_dataset_2_child.dta, replace

restore
keep if samp2==1

sort grp_hh hhorder

gen wife_samp = sex==2 & age>=30 & age<=45 & marst==1
gen husb_samp = sex==1 & age>=30 & age<=45 & marst==1 & wife_samp[_n+1]==1 & grp_hh==grp_hh[_n+1]

replace wife_samp= 0 if husb_samp[_n-1]==0

keep if wife_samp==1 | husb_samp==1

gen couple_id = 1 in 1
replace couple_id = couple_id[_n-1] + husb_samp if couple_id==.

count if couple_id==couple_id[_n-1] & grp_hh!=grp_hh[_n-1]

egen count_couple = count(couple_id), by(couple_id)
tab count_couple


gen spouse_type = 1 if husb_samp==1
replace spouse_type=2 if wife_samp==1

keep name spouse_type couple_id educ_attain

reshape wide educ_attain name, i(couple_id) j(spouse_type)
rename educ_attain1 educ_attain_husb
rename educ_attain2 educ_attain_wife

rename name1 name_husb
rename name2 name_wife

xtile educ_qtile_husb = educ_attain_husb, nq(4)
xtile educ_qtile_wife = educ_attain_wife, nq(4)

drop couple_id

save $myfiles/simulation_dataset_2_adult.dta, replace

log close

/*NEXT: 

(1) Run individual regressions of marital status on (1) years of education; (2) quartile dummies
(2) Record R-squared, share50 
(3) Run pseudo regressions of marital status on (1) years of education; (2) pseudo quartile dummies
(4) Simulation with name assignment from R file

*/
